*********************************************
* Title: rwanda_jde_tableA16-A17.do
* Author: Todd Pugatch
* Last update: June 10 2024
* Description: analysis for Blimpo and Pugatch, "Entrepreneurship Education
*	and Teacher Training in Rwanda," Stage 2 Registered report, Journal of 
*	Development Economics
* Inputs: 	student_merge_jde.dta
* Outputs: 	rwanda_jde_tableA16-A17.txt
*			rwanda_jde_tableA16-A17[a-b].out
* Notes: produces Tables A16-A17
**********************************************

local start=`"$S_TIME"'
clear
clear matrix
clear mata
graph drop _all
set autotabgraphs on
set scheme s2mono
program drop _all
cap log close
set more off

* Set directories 
*global main "/Users/dhorvath/Dropbox (MIT)/Data Publication/rwanda_final/rwanda_replication_package"
	global rawdata "$main/01_data/01_raw"
	global cleandata "$main/01_data/02_clean"
	global dofiles "$main/02_dofiles"
	global results "$main/03_results"
	global temp "$main/04_output"

* begin log file
log using "$temp/rwanda_jde_tableA16-A17.txt", text replace

* DATA PREP
qui use "$cleandata/student_merge_jde.dta", clear

* winsorize all financial variables used in analysis
local fin_el "earn_last2mths_usd profits_last2mths_adj_usd earn_alt_adj_usd"
local fin_bl "earn_last2mths_usd business_inc_last2mths_usd"
foreach w in el bl {
	foreach x in `fin_`w'' {
		winsor `x'_`w', gen(`x'w_`w') p(.01) highonly
		lab var `x'w_`w' "`x'_`w', winsorized at 99th percentile"
	}
}
winsor savings_usd_cond_el, gen(savings_usd_condw_el) p(0.01) highonly
lab var savings_usd_condw_el "savings_usd_cond_el, winsorized at 99th percentile"


* prep missing values for students without baseline outcome
/*first, impute zero for (winsorized) business income for those who don't report owning a business 
	(original version conditioned on business ownership)*/
qui replace business_inc_last2mths_usdw_bl=0 if ownbusiness_bl==0

/*now impute control group mean to remaining missing values*/
local Y0 "ownbusiness_bl earn_money_bl earn_last2mths_usdw_bl business_inc_last2mths_usdw_bl anysavings_bl savings_less5k_bl savings_5kto10k_bl savings_more10k_bl"
foreach y0 in `Y0' {
	qui gen `y0'i=`y0'
	qui replace `y0'i=r(mean) if `y0'==.	
	qui su `y0' if treatment==0
	qui gen `y0'm=(`y0'==.)
	lab var `y0'i "`y0', imputing control mean for missing values"
	lab var `y0'm "missing value for `y0'"
}

* ANALYSIS
/*	Regress outcome on treatment status, controlling for randomization strata. 
	Cluster s.e.'s by school.
	For Lee bounds, proceed in 3 steps:
		1) regress outcome on baseline and strata indicators
		2) get residuals
		3) calculate Lee bounds on these residuals
	Calculate Lee bounds first, then regression. This allows for exporting bounds
		into regression results. Note that this method does not always produce
		bounds containing the original coefficient, because treatment effect 
		from residualized regression not exactly equal to original treatment
		effect (because of potentially spurious correlation between treatment
		and covariates).*/
* define program for obtaining Lee bounds
program getleebounds
	qui predict e if e(sample), residuals
	qui leebounds e treatment, select(insample_el)
	scalar define lb=_b[lower]
	scalar define ub=_b[upper]
	drop e
end

/*1. What is treatment effect on unconditional savings and business profit?
	--initial analysis found negative treatment effects, but conditioned on any
		savings or on business ownership
	--show conditional and unconditional results as comparison
	--also try log (excluding zeroes) and inverse hyperbolic sine (including
		zeroes) specifications*/
winsor savings_usd_el, gen(savings_usd_w_el) p(0.01) highonly
qui replace savings_usd_w_el=0 if anysavings_el==0
qui gen lsavings_usd_w_el=ln(savings_usd_w_el)
qui gen ihssavings_usd_w_el=asinh(savings_usd_w_el)
lab var savings_usd_w_el "savings_usd_el, unconditional, winsorized at 99th percentile"
lab var savings_usd_w_el "ln savings_usd_el, unconditional, winsorized at 99th percentile"
lab var ihssavings_usd_w_el "inv hyp sine savings_usd_el, unconditional, winsorized at 99th percentile"

qui gen profits_last2mths_uc_el=profits_last2mths_adj_usdw_el
qui replace profits_last2mths_uc_el=0 if personal_business_el==0
qui gen lprofits_last2mths_uc_el=ln(profits_last2mths_uc_el)
qui gen ihsprofits_last2mths_uc_el=asinh(profits_last2mths_uc_el)
lab var profits_last2mths_uc_el "business profit last 2 months, unconditional, winsorized at 99th percentile"
lab var lprofits_last2mths_uc_el "ln business profit last 2 months, unconditional, winsorized at 99th percentile"
lab var ihsprofits_last2mths_uc_el "inv hyp sine business profit last 2 months, unconditional, winsorized at 99th percentile"

* box plots of distributions (conditional, omitting outliers)
graph box profits_last2mths_adj_usdw_el, over(treatment) nooutsides ///
	title("Profits, endline") subtitle("conditional on business participation") ///
	ytitle("profits last 2 months (USD)")  ///
	note("Winsorized at 99th percentile. Does not plot outside values.") ///
	nodraw name(g1)
graph box profits_last2mths_uc_el, over(treatment) nooutsides ///
	title("Profits, endline") subtitle("unconditional") ///
	ytitle("profits last 2 months (USD)") /// 
	note("Winsorized at 99th percentile. Does not plot outside values.") ///
	nodraw name(g2)
graph box savings_usd_cond_el, over(treatment) nooutsides ///
	title("Savings, endline") subtitle("conditional on any savings") ///
	ytitle("savings (USD)") /// 
	note("Winsorized at 99th percentile. Does not plot outside values.") ///
	nodraw name(g3)
graph box savings_usd_w_el, over(treatment) nooutsides ///
	title("Savings, endline") subtitle("unconditional") ///
	ytitle("savings (USD)")  ///
	note("Winsorized at 99th percentile. Does not plot outside values.") ///
	nodraw name(g4)	
forval g=1/4 {
	graph display g`g'
}

* savings
local stat ""control mean",mu_c"
local y0 "savings_less5k_bli savings_5kto10k_bli savings_more10k_bli savings_less5k_blm savings_5kto10k_blm savings_more10k_blm"
local Y "savings_usd_w_el lsavings_usd_w_el ihssavings_usd_w_el"

* savings_usd_condw_el
qui areg savings_usd_condw_el treatment `y0', a(strata) cluster(school_code)
	qui su savings_usd_condw_el if treatment==0 & e(sample)
	scalar define mu_c=r(mean)
	outreg2 treatment using "$results/rwanda_jde_tableA16-A17a.xls", se excel nolabel nocons addstat(`stat') replace
	
foreach y in `Y' {
	qui areg `y' treatment `y0', a(strata) cluster(school_code)
	qui su `y' if treatment==0 & e(sample)
	scalar define mu_c=r(mean)
	outreg2 treatment using "$results/rwanda_jde_tableA16-A17a.xls", se excel nolabel nocons addstat(`stat') append
}

* profits
local stat ""control mean",mu_c,"baseline mean",mu_0"
local y0 "business_inc_last2mths_usdw_bli business_inc_last2mths_usdw_blm"
local Y "profits_last2mths_adj_usdw_el profits_last2mths_uc_el lprofits_last2mths_uc_el ihsprofits_last2mths_uc_el"
foreach y in `Y' {
	qui areg `y' treatment `y0', a(strata) cluster(school_code)
	qui su `y' if treatment==0 & e(sample)
	scalar define mu_c=r(mean)
	qui su business_inc_last2mths_usdw_bli if e(sample)
	scalar define mu_0=r(mean)
	outreg2 treatment using "$results/rwanda_jde_tableA16-A17a.xls", se excel nolabel nocons addstat(`stat') append
}

/*2. Did treatment affect business expenses also? 
	This could be channel for negative effects on savings and profits. Use same
	conditional/unconditional specifications as above.*/
/*prep data*/
winsor expenses_last2mths_adj_usd_el, gen(expenses_last2mths_adj_usdw_el) p(.01) highonly
qui gen expenses_last2mths_uc_el=expenses_last2mths_adj_usdw_el
qui replace expenses_last2mths_uc_el=0 if personal_business_el==0
qui gen lexpenses_last2mths_uc_el=ln(expenses_last2mths_uc_el)
qui gen ihsexpenses_last2mths_uc_el=asinh(expenses_last2mths_uc_el)
lab var expenses_last2mths_adj_usdw_el "expenses_last2mths_adj_usdw_el, winsorized at 99th percentile"
lab var expenses_last2mths_uc_el "business expenses last 2 months, unconditional, winsorized at 99th percentile"
lab var lexpenses_last2mths_uc_el "ln business expenses last 2 months, unconditional, winsorized at 99th percentile"
lab var ihsexpenses_last2mths_uc_el "inv hyp sine business expenses last 2 months, unconditional, winsorized at 99th percentile"
	
/*regressions*/
local stat ""control mean",mu_c"
local y0 "business_inc_last2mths_usdw_bli business_inc_last2mths_usdw_blm"
local Y "expenses_last2mths_adj_usdw_el expenses_last2mths_uc_el lexpenses_last2mths_uc_el ihsexpenses_last2mths_uc_el"
foreach y in `Y' {
	qui areg `y' treatment `y0', a(strata) cluster(school_code)
	qui su `y' if treatment==0 & e(sample)
	scalar define mu_c=r(mean)
	outreg2 treatment using "$results/rwanda_jde_tableA16-A17a.xls", se excel nolabel nocons addstat(`stat') append
}

/*3. Did treatment affect the activities businesses do?
	Run unconditional and conditional on business participation.*/
local stat ""control mean",mu_c"
local Y "createprod finrecords paidtax registered"
local y0 "ownbusiness_bli ownbusiness_blm"

* unconditional
* projideas
qui areg business_projideas_el treatment `y0', a(strata) cluster(school_code)
qui su business_projideas_el if treatment==0 & e(sample)
scalar define mu_c=r(mean)
outreg2 treatment using "$results/rwanda_jde_tableA16-A17b.xls", se excel nolabel nocons addstat(`stat') replace
	

foreach y in `Y' {
	qui areg business_`y'_el treatment `y0', a(strata) cluster(school_code)
	qui su business_`y'_el if treatment==0 & e(sample)
	scalar define mu_c=r(mean)
	outreg2 treatment using "$results/rwanda_jde_tableA16-A17b.xls", se excel nolabel nocons addstat(`stat') replace
}

* conditional on business participation
local Y "projideas createprod finrecords paidtax registered"
foreach y in `Y' {
	qui areg business_`y'_el treatment `y0' if personal_business_el==1, a(strata) cluster(school_code)
	qui su business_`y'_el if treatment==0 & e(sample)
	scalar define mu_c=r(mean)
	outreg2 treatment using "$results/rwanda_jde_tableA16-A17b.xls", se excel nolabel nocons addstat(`stat') replace
}
	
local end=`"$S_TIME"' 
di "`start'"
di "`end'"
log close
